Presence-only data

Author

Florencia Grattarola

Published

September 11, 2024

Code
library(plotly)
library(packcircles)
library(ggiraph)
library(countrycode)
library(tmap)
tmap_mode('view')
library(sf)
sf::sf_use_s2(FALSE)
library(tidyverse)

Data

List of species

carnivores <- read_csv('data/carnivores.csv')

Number of carnivore species in the Neotropics = 71

Countries

countries <- st_read('data/latam.gpkg', layer = 'countries', quiet = T)

Number of countries in the Neotropics = 21

Presence only records

PO <- read_csv('data/data_PO.csv')

data_PO <- PO %>% 
  st_as_sf(coords=c('decimalLongitude', 'decimalLatitude')) %>%
  st_set_crs(4326)

Number of PO records = 60179
Number of species represented in the PO data = 60 (84.5 %)

Sources

Top 10 data publishers (institutions)

Code
data_PO %>% st_drop_geometry() %>% 
  count(institutionCode) %>% 
  arrange(desc(n)) %>% filter(!is.na(institutionCode)) %>% 
  slice_head(n=10) %>% 
  mutate(`%` = formattable::percent(n / sum(n))) %>% 
  kableExtra::kbl()
institutionCode n %
iNaturalist 19439 47.08%
Corporación Nacional Forestal (CONAF) 8031 19.45%
APN-AR 2779 6.73%
MMA:GEFMONT 1871 4.53%
Asociación Colombiana de Zoología (ACZ) 1849 4.48%
Ecopetrol S.A. 1816 4.40%
MAATE 1467 3.55%
IBUNAM 1438 3.48%
Parques Nacionales Naturales de Colombia (PNN) 1320 3.20%
Instituto de Investigación de Recursos Biológicos Alexander von Humboldt (IAvH) 1277 3.09%

Number of datasets involved in the PO data = 434

Geographic coverage

Code
tmap_mode('plot')
tm_shape(data_PO) +
    tm_dots(col='year', size = 0.1, palette= 'Greys') + 
    tm_facets(by = 'countryCode' ) +
    tm_shape(countries) +
    tm_borders(col='grey60', alpha = 0.5) +
    tm_layout(asp = 0, legend.outside= T, legend.outside.size = 0.1)

Code
data_PO %>% 
  st_drop_geometry() %>% 
  mutate(Country=countrycode(countryCode,
                             origin = 'iso2c', 
                             destination = 'country.name')) %>%
  group_by(Country) %>%
  summarise(`Number of records`=n(), 
            `Number of species`=n_distinct(scientificName)) %>% 
  mutate(code=countrycode(Country,
                             origin = 'country.name', 
                             destination = 'iso2c')) %>%
  relocate(Country, code) %>% 
  kableExtra::kbl()
Country code Number of records Number of species
Argentina AR 4828 24
Belize BZ 215 14
Bolivia BO 145 19
Brazil BR 5356 26
Chile CL 10632 11
Colombia CO 12780 31
Costa Rica CR 3029 21
Ecuador EC 2922 27
El Salvador SV 55 10
French Guiana GF 1172 15
Guatemala GT 309 16
Guyana GY 29 14
Honduras HN 121 14
Mexico MX 15409 29
Nicaragua NI 47 13
Panama PA 428 16
Paraguay PY 782 16
Peru PE 747 21
Suriname SR 953 11
Uruguay UY 189 7
Venezuela VE 31 11

Number of countries represented in the PO data = 21

Taxonomic coverage

Code
data <- data_PO %>% st_drop_geometry() %>% 
  group_by(family) %>% 
  summarise(n_records=n(), n_species=n_distinct(scientificName)) 

packing <- circleProgressiveLayout(data$n_records, sizetype='area')
data <- cbind(data, packing)
packing$radius <- 0.95*packing$radius

dat.gg <- circleLayoutVertices(packing, npoints=50)
data$text <- paste("family: ", data$family, 
                   "\n", "records:", data$n_records, 
                   "\n", "species:", data$n_species)

plot.gg <- ggplot() + 
  geom_polygon_interactive(data = dat.gg,
                           aes(x, y, group = id, fill=id,
                               tooltip = data$text[id],
                               data_id = id), colour = "black", alpha = 0.6) +
  scale_fill_continuous_interactive(type = 'viridis') +
  geom_text(data = data, aes(x, y, label = gsub("Group_", "", family)), size=3, color="black") +
  theme_void() + 
  theme(legend.position="none", plot.margin=unit(c(0,0,0,0),"cm") ) + 
  coord_equal()

x <- girafe(ggobj = plot.gg)
x
Code
data_PO %>% st_drop_geometry() %>% 
  group_by(scientificName, family) %>% count %>% 
  arrange(family, scientificName, n) %>% 
  rename(`Number of records`=n) %>% 
  kableExtra::kbl()
scientificName family Number of records
Atelocynus microtis Canidae 41
Canis latrans Canidae 2288
Canis lupus Canidae 241
Cerdocyon thous Canidae 4332
Chrysocyon brachyurus Canidae 575
Lycalopex culpaeus Canidae 8561
Lycalopex fulvipes Canidae 72
Lycalopex grisea Canidae 80
Lycalopex gymnocerca Canidae 2087
Lycalopex sechurae Canidae 13
Lycalopex vetula Canidae 91
Speothos venaticus Canidae 60
Urocyon cinereoargenteus Canidae 2868
Vulpes macrotis Canidae 132
Herpailurus yagouaroundi Felidae 1171
Leopardus colocola Felidae 183
Leopardus geoffroyi Felidae 302
Leopardus guigna Felidae 509
Leopardus guttulus Felidae 34
Leopardus jacobitus Felidae 1
Leopardus pajeros Felidae 8
Leopardus pardalis Felidae 3371
Leopardus tigrinus Felidae 379
Leopardus wiedii Felidae 471
Lynx rufus Felidae 1055
Panthera onca Felidae 1230
Puma concolor Felidae 3999
Conepatus chinga Mephitidae 629
Conepatus leuconotus Mephitidae 406
Conepatus semistriatus Mephitidae 578
Mephitis macroura Mephitidae 569
Mephitis mephitis Mephitidae 210
Spilogale angustifrons Mephitidae 161
Spilogale gracilis Mephitidae 106
Spilogale pygmaea Mephitidae 16
Eira barbara Mustelidae 2957
Galictis cuja Mustelidae 374
Galictis vittata Mustelidae 196
Lontra canadensis Mustelidae 1
Lontra longicaudis Mustelidae 959
Lontra provocax Mustelidae 81
Neogale felipei Mustelidae 1
Neogale frenata Mustelidae 251
Pteronura brasiliensis Mustelidae 272
Taxidea taxus Mustelidae 154
Bassaricyon alleni Procyonidae 32
Bassaricyon gabbii Procyonidae 53
Bassaricyon medius Procyonidae 11
Bassaricyon neblina Procyonidae 26
Bassariscus astutus Procyonidae 1113
Bassariscus sumichrasti Procyonidae 63
Nasua narica Procyonidae 5127
Nasua nasua Procyonidae 2620
Nasua olivacea Procyonidae 275
Potos flavus Procyonidae 815
Procyon cancrivorus Procyonidae 2681
Procyon lotor Procyonidae 2098
Procyon pygmaeus Procyonidae 1
Tremarctos ornatus Ursidae 2753
Ursus americanus Ursidae 436

Species not covered

Code
# species not included  
carnivores %>% 
  filter(!species %in% unique(data_PO$scientificName)) %>%
  select(species, family) %>%  arrange(family, species) %>% 
  kableExtra::kbl()
species family
Leopardus braccatus Felidae
Leopardus emiliae Felidae
Leopardus fasciatus Felidae
Leopardus garleppi Felidae
Leopardus jacobita Felidae
Leopardus narinensis Felidae
Spilogale interrupta Mephitidae
Spilogale leucoparia Mephitidae
Spilogale yucatanensis Mephitidae
Enhydra lutris Mustelidae
Lyncodon patagonicus Mustelidae
Neogale africana Mustelidae

Temporal coverage

Code
coef <- 150

plot_records <- data_PO %>% st_drop_geometry() %>% 
  mutate(date_observed = dmy(str_c(day, month, year, sep='-'))) %>% 
  add_count(year, name='records_per_year') %>% 
  group_by(year) %>% 
  mutate(species_per_year= n_distinct(scientificName)) %>% ungroup() %>% 
    ggplot(aes(x=date_observed)) +
    geom_line(aes(y=records_per_year, col='Records'), 
              linewidth=1, col=RColorBrewer::brewer.pal(9, 'Set1')[2]) +
    scale_y_continuous(n.breaks = 10) +
    scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
    theme_bw() + labs(x='',, y='Number of records', col='')

plot_species <- data_PO %>% st_drop_geometry() %>% 
  mutate(date_observed = dmy(str_c(day, month, year, sep='-'))) %>% 
  add_count(year, name='records_per_year') %>% 
  group_by(year) %>% mutate(species_per_year= n_distinct(scientificName)) %>% ungroup() %>% 
  ggplot(aes(x=date_observed)) +
  geom_line(aes(y=species_per_year, col='species'), linewidth=1, 
            col=RColorBrewer::brewer.pal(9, 'Set1')[1]) +
  scale_x_date(date_breaks = "2 years", date_labels = "%Y") +
  theme_bw() + labs(x='',, y='Number of species', col='')

plot_records <- ggplotly(plot_records)
plot_records
Code
plot_species <- ggplotly(plot_species)
plot_species